Code adapted from: https://www.kaggle.com/arnavkj95/candidate-generation-and-luna16-preprocessing
Now we will do the segmentation but instead of using R, we will be using Python.
First we need to import some Python libraries.
import numpy as np # pip3 install numpy
import pandas as pd # pip3 install pandas
# pip3 install matplotlib
# pip3 install scipy
import skimage # pip3 install scikit-image
import os
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
from skimage.measure import label,regionprops, perimeter
from skimage.morphology import binary_dilation, binary_opening
from skimage.filters import roberts, sobel
from skimage import measure, feature
from skimage.segmentation import clear_border
from skimage import data
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import dicom # pip3 install dicom
import scipy.misc
import pydicom # pip3 install pydicom
import matplotlib.pyplot as plt
Each scan consist in multiple slices. We have all the DICOM images from the scan in one folder. In path_images we indicate the path of the folder.
from subprocess import check_output
path_images = "/Users/andrealetaalfonso/Desktop/TFG/images/Kaggle/ID00184637202242062969203/"
# You can check that everything is loading correctly with: print(check_output(["ls", path_images]).decode("utf8"))
To read the images, we will use the function pydicom.read_file(). Then we will update the intensity values of -2000 with 0. These pixels are the ones that are located outside the scanner bounds.
# pip3 install nltk==3.6.2
lung = pydicom.read_file("/Users/andrealetaalfonso/Desktop/TFG/images/Kaggle/ID00184637202242062969203/20.dcm")
slice = lung.pixel_array
plt.axis('off')
slice[slice == -2000] = 0
plt.imshow(slice, cmap=plt.cm.gray)
plt.show()
We create a function file_is_hidden() to read only .dcm files and not hidden files in the folder that we cannot see in our computers.
if os.name == 'nt':
import win32api, win32con
def file_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return p.startswith('.') #linux-osx
file_list = [f for f in os.listdir(path_images) if not file_is_hidden(f)]
Now we will read all the images from a folder with a function named read_ct_scan(folder_name).
def read_ct_scan(folder_name):
# Read the slices from the dicom file
slices = [pydicom.read_file(folder_name + filename) for filename in os.listdir(folder_name) if not file_is_hidden(filename)]
# Sort the dicom slices in their respective order
slices.sort(key=lambda x: int(x.InstanceNumber))
# Get the pixel values for all the slices
slices = np.stack([s.pixel_array for s in slices])
slices[slices == -2000] = 0
return slices
ct_scan = read_ct_scan(path_images)
Plot some of the images from a folder.
def plot_ct_scan(scan):
f, plots = plt.subplots(int(scan.shape[0] / 20) + 1, 4, figsize=(25, 25))
for i in range(0, scan.shape[0], 5):
plots[int(i / 20), int((i % 20) / 5)].axis('off')
plots[int(i / 20), int((i % 20) / 5)].imshow(scan[i], cmap=plt.cm.gray)
plot_ct_scan(ct_scan)
plt.show()